library(survival)
library(parallel)
library(data.table)
load("data/analysis_data.RData")
source("R/00-functions.R")
RNGkind("L'Ecuyer-CMRG")
set.seed(174071644)

# main dataset ----
data_main <- respondent_candidate_data[main == 1]
n_main <- data_main[, length(unique(subject_id))]
data_main[, subject_id := rep(1:n_main, each = 7)]
model_main <- coxph(
  Surv(8-ranking, status) ~
    preferences * electability_last +
    electability * electability_last +
    expected_utility * electability_last +
    strata(subject_id) - electability_last,
  data = data_main,
  id = subject_id)
ids <- sort(unique(data_main$subject_id))
data_list_main <- lapply(ids, function(x) {
  data_main[subject_id == x]
})
n <- length(data_list_main)
boot <- function(i) {
  boot_i <- sample(n, replace = TRUE)
  data <- rbindlist(lapply(1:n, function(i) {
    out <- copy(data_list_main[[boot_i[i]]])
    out[, subject_id := i]
    out
  }))
  coxph(
    Surv(8-ranking, status) ~
      preferences * electability_last +
      electability * electability_last +
      expected_utility * electability_last +
      strata(subject_id) - electability_last,
    data = data,
    id = subject_id)
}
boots_main <- mclapply(1:1000, boot, mc.cores = 6)
coefs_main <- rbindlist(lapply(boots_main,
  function(x) as.data.table(rbind(coef(x)))))
coefs_main <- coefs_main[, .(
  variable = c("Preferences", "Electability", "Expected Utility",
    "Preferences $\\times$ Electability Last",
    "Electability $\\times$ Electability Last",
    "Expected Utility $\\times$ Electability Last"),
  est = c(
    mean(`preferences`),
    mean(`electability`),
    mean(`expected_utility`),
    mean(`preferences:electability_last`),
    mean(`electability:electability_last`),
    mean(`expected_utility:electability_last`)),
  q025 = c(
    quantile(`preferences`, .025),
    quantile(`electability`, .025),
    quantile(`expected_utility`, .025),
    quantile(`preferences:electability_last`, .025),
    quantile(`electability:electability_last`, .025),
    quantile(`expected_utility:electability_last`, .025)),
  q975 = c(
    quantile(`preferences`, .975),
    quantile(`electability`, .975),
    quantile(`expected_utility`, .975),
    quantile(`preferences:electability_last`, .975),
    quantile(`electability:electability_last`, .975),
    quantile(`expected_utility:electability_last`, .975))
)]
for (i in 1:nrow(coefs_main)) {
  coefs_main[i, cat(
    variable,
    "&$", sprintf("%.03f", est), "$",
    "&$[", sprintf("%.03f", q025), ",", sprintf("%.03f", q975), "]$",
    "\\\\\n")]
}

# lp <- predict(model_main)
# all_rankings <- rbind(1:7, permute::allPerms(7))
# calculate_expected_rankings <- function(linear_predictor) {
#   do.call(c, mclapply(1:n_main, function(i) {
#     scores <- lp[((i - 1) * 7 + 1):(i * 7)]
#     probs <- matrix(NA, nrow = 5040, ncol = 7)
#     fill_probs_column_k <- function(k) {
#       if (k == 1) {
#         p1 <- exp(scores) / sum(exp(scores))
#         p1[all_rankings[, 1]]
#       } else {
#         sapply(1:5040, function(row) {
#           remaining_candidates <- setdiff(1:7, all_rankings[row, 1:(k-1)])
#           exp_scores <- exp(scores[remaining_candidates])
#           pk <- exp_scores / sum(exp_scores)
#           pk[match(all_rankings[row, k], remaining_candidates)]
#         })
#       }
#     }
#     for (k in 1:7) {
#       probs[, k] <- fill_probs_column_k(k)
#     }
#     probs <- apply(probs, 1, prod)
#     expected_ranking <- sapply(1:7, function(candidate) {
#       sum(sapply(1:5040, function(row) {
#         candidate_rank <- which(all_rankings[row, ] == candidate)
#         probs[row] * candidate_rank
#       }))
#     })
#     expected_ranking
#   }, mc.cores = 6))
# }
# expected_rankings <- calculate_expected_rankings(lp)
# difference in average linear predictor for women/men by elicitation order

# full dataset ----
data_full <- respondent_candidate_data
data_full[, subject_id := rep(1:1211, each = 7)]
model_full <- coxph(
  Surv(8-ranking, status) ~
    preferences * electability_last +
    electability * electability_last +
    expected_utility * electability_last +
    strata(subject_id) - electability_last,
  data = data_full,
  id = subject_id)
ids <- sort(unique(data_full$subject_id))
data_list_full <- lapply(ids, function(x) {
  data_full[subject_id == x]
})
n <- length(data_list_full)
boot <- function(i) {
  boot_i <- sample(n, replace = TRUE)
  data <- rbindlist(lapply(1:n, function(i) {
    out <- copy(data_list_full[[boot_i[i]]])
    out[, subject_id := i]
    out
  }))
  coxph(
    Surv(8-ranking, status) ~
      preferences * electability_last +
      electability * electability_last +
      expected_utility * electability_last +
      strata(subject_id) - electability_last,
    data = data,
    id = subject_id)
}
boots_full <- mclapply(1:1000, boot, mc.cores = 6)
coefs_full <- rbindlist(lapply(boots_full,
  function(x) as.data.table(rbind(coef(x)))))
coefs_full[, .(
  type = c("preferences", "electability", "expected utility"),
  est = c(
    mean(`preferences:electability_last`),
    mean(`electability:electability_last`),
    mean(`expected_utility:electability_last`)),
  q025 = c(
    quantile(`preferences:electability_last`, .025),
    quantile(`electability:electability_last`, .025),
    quantile(`expected_utility:electability_last`, .025)),
  q975 = c(
    quantile(`preferences:electability_last`, .975),
    quantile(`electability:electability_last`, .975),
    quantile(`expected_utility:electability_last`, .975))
)]
# difference in average linear predictor for women/men by elicitation order
lps_full <- do.call(cbind, lapply(boots_full, function(x) {
  predict(x, newdata = data_full)
}))
women_full <- rep(c(F, T, F, F, F, T, F), 1211)
quantile(colMeans(lps_full[women_full, ]), c(.025, .975))
quantile(colMeans(lps_full[!women_full, ]), c(.025, .975))

# non-reversed rankings ----
# main dataset ----
data_main_rev <- respondent_candidate_data[main == 1]
data_main_rev[, subject_id := rep(1:833, each = 7)]
model_main_rev <- coxph(
  Surv(ranking, status) ~
    preferences * electability_last +
    electability * electability_last +
    expected_utility * electability_last +
    strata(subject_id) - electability_last,
  data = data_main_rev,
  id = subject_id)
ids <- sort(unique(data_main_rev$subject_id))
data_list_main_rev <- lapply(ids, function(x) {
  data_main_rev[subject_id == x]
})
n <- length(data_list_main_rev)
boot <- function(i) {
  boot_i <- sample(n, replace = TRUE)
  data <- rbindlist(lapply(1:n, function(i) {
    out <- copy(data_list_main_rev[[boot_i[i]]])
    out[, subject_id := i]
    out
  }))
  coxph(
    Surv(ranking, status) ~
      preferences * electability_last +
      electability * electability_last +
      expected_utility * electability_last +
      strata(subject_id) - electability_last,
    data = data,
    id = subject_id)
}
boots_main_rev <- mclapply(1:1000, boot, mc.cores = 6)
coefs_main_rev <- rbindlist(lapply(boots_main_rev,
  function(x) as.data.table(rbind(coef(x)))))
coefs_main_rev <- coefs_main_rev[, .(
  variable = c("Preferences", "Electability", "Expected Utility",
    "Preferences $\\times$ Electability Last",
    "Electability $\\times$ Electability Last",
    "Expected Utility $\\times$ Electability Last"),
  est = c(
    mean(`preferences`),
    mean(`electability`),
    mean(`expected_utility`),
    mean(`preferences:electability_last`),
    mean(`electability:electability_last`),
    mean(`expected_utility:electability_last`)),
  q025 = c(
    quantile(`preferences`, .025),
    quantile(`electability`, .025),
    quantile(`expected_utility`, .025),
    quantile(`preferences:electability_last`, .025),
    quantile(`electability:electability_last`, .025),
    quantile(`expected_utility:electability_last`, .025)),
  q975 = c(
    quantile(`preferences`, .975),
    quantile(`electability`, .975),
    quantile(`expected_utility`, .975),
    quantile(`preferences:electability_last`, .975),
    quantile(`electability:electability_last`, .975),
    quantile(`expected_utility:electability_last`, .975))
)]
# difference in average linear predictor for women/men by elicitation order
lps_main_rev <- do.call(cbind, lapply(boots_main_rev, function(x) {
  predict(x, newdata = data_main_rev)
}))
women_main_rev <- rep(c(F, T, F, F, F, T, F), 833)
quantile(colMeans(lps_main_rev[women_main_rev, ]), c(.025, .975))
quantile(colMeans(lps_main_rev[!women_main_rev, ]), c(.025, .975))

# full dataset ----
data_full_rev <- respondent_candidate_data
data_full_rev[, subject_id := rep(1:1211, each = 7)]
model_full_rev <- coxph(
  Surv(ranking, status) ~
    preferences * electability_last +
    electability * electability_last +
    expected_utility * electability_last +
    strata(subject_id) - electability_last,
  data = data_full_rev,
  id = subject_id)
ids <- sort(unique(data_full_rev$subject_id))
data_list_full_rev <- lapply(ids, function(x) {
  data_full_rev[subject_id == x]
})
n <- length(data_list_full_rev)
boot <- function(i) {
  boot_i <- sample(n, replace = TRUE)
  data <- rbindlist(lapply(1:n, function(i) {
    out <- copy(data_list_full_rev[[boot_i[i]]])
    out[, subject_id := i]
    out
  }))
  coxph(
    Surv(8-ranking, status) ~
      preferences * electability_last +
      electability * electability_last +
      expected_utility * electability_last +
      strata(subject_id) - electability_last,
    data = data,
    id = subject_id)
}
boots_full_rev <- mclapply(1:1000, boot, mc.cores = 6)
coefs_full_rev <- rbindlist(lapply(boots_full_rev,
  function(x) as.data.table(rbind(coef(x)))))
coefs_full_rev[, .(
  type = c("preferences", "electability", "expected utility"),
  est = c(
    mean(`preferences:electability_last`),
    mean(`electability:electability_last`),
    mean(`expected_utility:electability_last`)),
  q025 = c(
    quantile(`preferences:electability_last`, .025),
    quantile(`electability:electability_last`, .025),
    quantile(`expected_utility:electability_last`, .025)),
  q975 = c(
    quantile(`preferences:electability_last`, .975),
    quantile(`electability:electability_last`, .975),
    quantile(`expected_utility:electability_last`, .975))
)]
# difference in average linear predictor for women/men by elicitation order
lps_full_rev <- do.call(cbind, lapply(boots_full_rev, function(x) {
  predict(x, newdata = data_full_rev)
}))
women_full_rev <- rep(c(F, T, F, F, F, T, F), 1211)
quantile(colMeans(lps_full_rev[women_full_rev, ]), c(.025, .975))
quantile(colMeans(lps_full_rev[!women_full_rev, ]), c(.025, .975))

# comparison ----
AIC(model_main)
AIC(model_main_rev)

AIC(model_full)
AIC(model_full_rev)

